global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/regression_firms.log, replace name(dat)

global weight_category "`2'"
di "${weight_category}"

global weight_versions "`3'"
di "${weight_versions}"

*this should set our weight version differently from the config if we are specifying it
if "`4'" != "" {
	global weight_window "`4'"
}
di "${weight_window}"

if "`5'" != "" {
	global wtype "`5'"
}
di "${wtype}"

capture noi {


/*
This do-file stores firm lists for the baseline regression with three different dependent variables.
it has to be run directly after the main file in order to allow other files to run.
*/

local depvars auto95 auto90 ppauto90 

foreach dv of local depvars {

    use ${final_dir}/regression_dataset_from1970_tfacit1.dta, clear
    *overwriting the depvar from the config inside the loop
    global depvar `dv'

    if "`dv'" == "ppauto90" { 
        mmerge BvD using ${final_dir}/bvd_list_regfirms_auto95.dta, unmatched(master)
        keep if _m == 3
        global depvar pauto90
    }

    *rename for convenience
    tab year, gen(YY_)
    estimates clear
    clonevar LSW = lswMPm_1995_a
    clonevar HSW = hswMPm_1995_a
    clonevar VAEMP = vaempMPm_1995_a
    clonevar GDPGAP = lngdpgap_1995_a
    clonevar GDPPC = gdppcMPm_1995_a
    clonevar spilloversown = spill${depvar}${ttt}_1995_a
    clonevar spilloversother = spillN${depvar}${ttt}_1995_a
    clonevar spilloversownzero = spill${depvar}${ttt}_1995_a0
    clonevar spilloversotherzero = spillN${depvar}${ttt}_1995_a0
    clonevar stockown = k${depvar}_${ttt} 
	clonevar stockownzero = k${depvar}_${ttt}0  
	clonevar stockother = kNOT_${depvar}_${ttt} 
	clonevar stockotherzero = kNOT_${depvar}_${ttt}0

    *add patent restrictition
    bys lse_id : egen _total_${depvar}_${ttt}_1995 = sum(${depvar}_${ttt}) if year>=1995+2 & year <= 2009+2
    bys lse_id : egen total_${depvar}_${ttt}_1995 = max(_total_${depvar}_${ttt}_1995)
    drop _total_${depvar}_${ttt}_1995
    *add FEs
    egen yearctry = group(year country_shr_1995) if year <= 2009
    egen yearctryindustry = group(year industry country_shr_1995) if year <= 2009
    egen yearindustry = group(year industry) if year <= 2009
    sort lse_id year


    *run baseline regression, extract a list of firms
    ppmlhdfe F2.${depvar}_${ttt} LSW HSW GDPGAP stockown stockownzero stockother stockotherzero spilloversown spilloversownzero spilloversother spilloversotherzero if year>=1995 & missing_weights_1995==0 & missing_spill_weights_1995 == 0 & maxweight_1995 < 1 & total_${depvar}_${ttt}_1995>0, absorb(lse_id yearindustry) vce(cluster lse_id)
    gen in_sample = (e(sample))
    keep if in_sample == 1
    keep BvD
    duplicates drop
    save ${final_dir}/bvd_list_regfirms_`dv'.dta, replace
}

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat